################################################################################
## Group Identities and Parliamentary Debates: Replication package
## Fiva, Nedregård and Øien (2025)

# Description:

## Code to make Figure A.9: "Fraction of politicians with white-collar background 
## by policy area and parliamentary session"
################################################################################



# Packages

library(data.table)
library(dplyr)
library(ggplot2)
library(showtext)

# Directories (wd is set by master.R)

dir      <-  "../data/2_processed_data"
fig.dir  <-  "../results/figures"

# data

d <- fread(paste(dir, "speeches_session_lemma.csv", sep = "/"))
d[, committee_list   := str_split(committee, ",")]

# data cleaning

d[, policy_committee_list   := str_split(policy_com, "\\|")]
d[, session_year := str_extract(session, "^\\d{4}")]

d <- d[, .(policy_com = unlist(policy_committee_list)), by = c("pid_session", 
                                                               "session_year", 
                                                               "occupation")]
d[, policy_com := str_trim(policy_com)]


d[, white := as.numeric(occupation == "white")]


## Female share across sessions

dt.plot <- d[, .(white.share = mean(white)), by = c("session_year", "policy_com")]
policy_com_names <- sort(unique(dt.plot$policy_com))

## Adding total

dt.plot.total <- d[, .(white.share = mean(white), policy_com = "Total"), by = "session_year"]
dt.plot <- rbindlist(list(dt.plot, dt.plot.total), use.names = T)
policy_com_names <- c(policy_com_names, "Total")

dt.plot[, Policy_com := factor(policy_com, levels = policy_com_names)]

## Plotting

font_add_google(name = "Montserrat", family = "Montserrat")
showtext_auto()

p <- dt.plot |>  
  ggplot(aes(x = as.numeric(session_year), y = white.share, group = Policy_com)) +
  facet_wrap(~ as.factor(Policy_com), scales = "fixed", axes = "all_x") +
  geom_line() +
  geom_point(size = 2, color = "black") +
  theme(
    panel.grid = element_blank(),
    panel.border = element_blank(),
    panel.grid.major.y= element_line(linewidth = 0.02, colour = "grey90"),
    panel.background = element_rect(fill = "transparent"),
    plot.background = element_rect(fill = "transparent"),
    plot.title = element_text(family = "Montserrat", color = "black", size = 8, hjust = 0.5, margin=margin(0,0,30,0),),
    panel.grid.major.x = element_blank(),
    strip.text = element_text(family = "Montserrat", color = "black", size = 10.2),
    strip.background = element_blank(),
    axis.ticks.length = unit(0, "lines"),
    axis.text.y = element_text(family = "Montserrat", size = 10),
    axis.title.y = element_text(family = "Montserrat", size = 13, margin=margin(0,30,0,0)),
    axis.text.x = element_text(family = "Montserrat", size = 10, angle = 45, hjust = 1),
    axis.title.x = element_text(family = "Montserrat", size = 13, margin=margin(30,0,0,0)),
    panel.spacing = unit(2, "lines")
  ) +
  scale_y_continuous(limits = c(0, 1), breaks = seq(0, 1, .25)) +
  scale_x_continuous(breaks = seq(1981, 2021, 4)) +
  labs(title = "", x = "Election year", y = "Fraction")

ggsave(paste(fig.dir, "figA9.pdf", sep = "/"), width = 12.5, height = 12, pointsize = 12)









